package com.xs.micro.tool.service.impl;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.gargoylesoftware.htmlunit.Page;
import com.gargoylesoftware.htmlunit.TextPage;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.xs.micro.tool.domain.pojo.em.QueryEnterpriseInfoType;
import com.xs.micro.tool.domain.pojo.vo.EnterpriseInfoResultVO;
import com.xs.micro.tool.domain.pojo.vo.QueryEnterpriseInfoParamVO;
import com.xs.micro.tool.service.BaseQueryDataService;
import okhttp3.*;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Service;
import org.springframework.util.Assert;

import java.util.List;
import java.util.Map;

@Service
public class QueryDataShangHaiServiceImpl extends BaseQueryDataService {

    private static final String PAGE_URL = "https://xyfw.fgw.sh.gov.cn/credit-front/credit/query/?model=tyshxydm&keywords=%s&pageNo=%s";
    private static final List<Integer> INDEX_LIST = Lists.newArrayList(0, 1, 2);
    private static final String RESULT_TAG = "table.table td";
    private static final int PAGE_SIZE = 10;

    @Override
    public QueryEnterpriseInfoType type() {
        return QueryEnterpriseInfoType.CREDIT_SHANG_HAI;
    }

    @Override
    public List<EnterpriseInfoResultVO> query(QueryEnterpriseInfoParamVO queryParam) throws Exception {
        WebClient webClient = null;
        try {
            List<EnterpriseInfoResultVO> list = Lists.newArrayList();
            webClient = createWebClient();
            // 处理中文
            String keyword = encode(queryParam.getKeyword());
            int maxCount = queryParam.getMaxCount();
            int maxPage = maxCount / PAGE_SIZE;
            for (int p = 0; p < maxPage; p++) {
                String url = String.format(PAGE_URL, keyword, p + 1);
                Page page = webClient.getPage(url);

                Assert.isTrue(page.getWebResponse().getStatusCode() < 400, "查询异常." + page.getWebResponse().getStatusMessage());
                Assert.isTrue(!page.getUrl().toString().contains("404"), "查询异常.页面未找到");

                String pageHtml = null;
                if (page.isHtmlPage()) {
                    pageHtml = ((HtmlPage) page).asXml();
                } else {
                    pageHtml = ((TextPage) page).getContent();
                }
                Assert.isTrue(StringUtils.isNotBlank(pageHtml), "查询数据失败，请稍候再试");
                Document doc = Jsoup.parse(pageHtml);
                Elements tdList = doc.select(RESULT_TAG);
                if (CollectionUtils.isEmpty(tdList)) {
                    continue;
                }
                for (int i = 0; i < tdList.size(); i += 5) {
                    List<String> results = Lists.newArrayList();
                    for (Integer index : INDEX_LIST) {
                        results.add(tdList.get(i + index).text());
                    }
                    EnterpriseInfoResultVO vo = new EnterpriseInfoResultVO(results.get(0), results.get(1), results.get(2));
                    list.add(vo);
                }
            }
            return list;
        } finally {
            if (webClient != null) {
                webClient.close();
            }
        }
    }
}
